# A tibble: 5 × 2
month avg_weight
<date> <dbl>
1 2020-06-01 78.8
2 2020-07-01 219.
3 2020-08-01 473.
4 2020-09-01 1128.
5 2020-10-01 959.
12/09/2024
Dataset: TidyTuesday’s May 28, 2024 release, “Lisa’s Vegetable Garden”.
Plan: Explore harvest trends in 2020 by calculating average monthly harvest weights and visualizing seasonal patterns.
Skills Developed:
# A tibble: 5 × 2
month avg_weight
<date> <dbl>
1 2020-06-01 78.8
2 2020-07-01 219.
3 2020-08-01 473.
4 2020-09-01 1128.
5 2020-10-01 959.
Dataset: TidyTuesday’s May 21, 2024 release, “Carbon Emissions”.
Plan: Summarize average annual carbon emissions and visualize trends over time.
Devloped Skills:
#Calculate the Observed Difference in Median Win Percentage
observed_diff <- combined_median_data |>
group_by(location) |>
summarise(median_win_pct = median(median_win_pct, na.rm = TRUE)) |>
summarise(diff = diff(median_win_pct)) |>
pull(diff)
cat("Observed Difference in Median Win Percentage (Home - Away):", observed_diff, "\n")Observed Difference in Median Win Percentage (Home - Away): 31.16884
# Define Permutation Function (Shuffling location within each team)
calculate_permutation <- function(data) {
data |>
group_by(TEAM) |>
mutate(location = sample(location, replace = FALSE)) |>
group_by(location) |>
summarise(median_win_pct = median(median_win_pct, na.rm = TRUE)) |>
summarise(diff = diff(median_win_pct)) |>
pull(diff)
}
#Permutation Test
num_permutations <- 10000
perm_results <- map_dbl(1:num_permutations, ~ calculate_permutation(combined_median_data))
# Calculate the two-sided p-value
p_value <- mean(abs(perm_results) >= abs(observed_diff))
cat("Two-Sided P-value:", p_value, "\n")Two-Sided P-value: 0
SELECT
Measurements.Identifier,
PI_Info.AuthorsShortList,
Measurements.Instrument,
Measurements.Frequency,
AVG(Measurements.Absorbance) AS MeanAbsorbance,
CONCAT(PI_Info.AuthorsShortList, ' et al. N=',
COUNT(DISTINCT CONCAT(Measurements.SubjectNumber, Measurements.Ear)), ', ', Measurements.Instrument) AS Legend_Label
FROM Measurements
JOIN PI_Info ON Measurements.Identifier = PI_Info.Identifier
WHERE Measurements.Identifier IN ('Abur_2014', 'Feeney_207', 'Groon_2015', 'Lewis_2015', 'Liu_2008', 'Rosowski_2012', 'Shahnaz_2006', 'Shaver_2013', 'Sun_2016', 'Voss_1994', 'Voss_2010', 'Werner_2010')
AND Measurements.Frequency >= 200 -- Apply frequency filter in SQL
GROUP BY Measurements.Identifier, Measurements.Instrument, PI_Info.AuthorsShortList, Measurements.Frequency;SELECT
Subjects.AgeCategoryFirstMeasurement AS AgeCategory,
Measurements.Frequency,
AVG(Measurements.Absorbance) AS MeanAbsorbance
FROM Measurements
JOIN Subjects ON Measurements.SubjectNumber = Subjects.SubjectNumber
WHERE Measurements.Identifier = 'Hunter_2016'
AND Measurements.Frequency >= 200 -- Apply frequency filter in SQL
GROUP BY Subjects.AgeCategoryFirstMeasurement, Measurements.Frequency;